library(MASS)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::select() masks MASS::select()
library(faraway)
#Factorize variables
birthwt$race <- as.factor(birthwt$race)

birthwt$smoke <- as.factor(birthwt$smoke)

birthwt$ht <- as.factor(birthwt$ht)

birthwt$ui <- as.factor(birthwt$ui)

birthwt$low <- as.factor(birthwt$low)

str(birthwt)
## 'data.frame':    189 obs. of  10 variables:
##  $ low  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ age  : int  19 33 20 21 18 21 22 17 29 26 ...
##  $ lwt  : int  182 155 105 108 107 124 118 103 123 113 ...
##  $ race : Factor w/ 3 levels "1","2","3": 2 3 1 1 1 3 1 3 1 1 ...
##  $ smoke: Factor w/ 2 levels "0","1": 1 1 2 2 2 1 1 1 2 2 ...
##  $ ptl  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ht   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ui   : Factor w/ 2 levels "0","1": 2 1 1 2 2 1 1 1 1 1 ...
##  $ ftv  : int  0 3 1 2 0 0 1 1 1 0 ...
##  $ bwt  : int  2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...
#Run the regression, including all covariates
data = lm(bwt ~. - low, data = birthwt)
summary(data)
## 
## Call:
## lm(formula = bwt ~ . - low, data = birthwt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1825.26  -435.21    55.91   473.46  1701.20 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2927.962    312.904   9.357  < 2e-16 ***
## age           -3.570      9.620  -0.371 0.711012    
## lwt            4.354      1.736   2.509 0.013007 *  
## race2       -488.428    149.985  -3.257 0.001349 ** 
## race3       -355.077    114.753  -3.094 0.002290 ** 
## smoke1      -352.045    106.476  -3.306 0.001142 ** 
## ptl          -48.402    101.972  -0.475 0.635607    
## ht1         -592.827    202.321  -2.930 0.003830 ** 
## ui1         -516.081    138.885  -3.716 0.000271 ***
## ftv          -14.058     46.468  -0.303 0.762598    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 650.3 on 179 degrees of freedom
## Multiple R-squared:  0.2427, Adjusted R-squared:  0.2047 
## F-statistic: 6.376 on 9 and 179 DF,  p-value: 7.891e-08
#Generate diagnostic plots for the model with all covariates
par(mfrow = c(2, 2))
plot(data)

#Boxcox test for the model
boxcox(data)

According to the result of boxcox test, transformation is needed.

data1 = lm(bwt**0.75 ~. ,data = birthwt) 
boxcox(data1)

Examine Leverages of the transformed model

hatvalues(data1)
##         85         86         87         88         89         91         92 
## 0.10151143 0.05949858 0.02852123 0.07061658 0.06515470 0.02275111 0.02259750 
##         93         94         95         96         97         98         99 
## 0.03019561 0.02783550 0.02921955 0.02890067 0.02928460 0.13026917 0.08182866 
##        100        101        102        103        104        105        106 
## 0.03439723 0.03439723 0.07307742 0.04595158 0.05696185 0.02637079 0.04246221 
##        107        108        109        111        112        113        114 
## 0.09849311 0.06678299 0.03019218 0.06549612 0.03356273 0.03205304 0.02573978 
##        115        116        117        118        119        120        121 
## 0.07087820 0.06036656 0.06036656 0.04606921 0.11547257 0.02113067 0.05786978 
##        123        124        125        126        127        128        129 
## 0.03029432 0.03441020 0.02860527 0.07336066 0.04597405 0.07413828 0.05608641 
##        130        131        132        133        134        135        136 
## 0.04958996 0.03197525 0.07089599 0.07089599 0.06830974 0.02565823 0.02845036 
##        137        138        139        140        141        142        143 
## 0.05379660 0.12133830 0.02276768 0.02465689 0.04798461 0.02466120 0.03078490 
##        144        145        146        147        148        149        150 
## 0.07827936 0.03979365 0.02559940 0.02805135 0.02805135 0.02822763 0.02458800 
##        151        154        155        156        159        160        161 
## 0.02693957 0.12190375 0.08834664 0.02882695 0.27004975 0.13926042 0.07866124 
##        162        163        164        166        167        168        169 
## 0.10438100 0.06804326 0.04428909 0.06340012 0.03570631 0.10492236 0.01877466 
##        170        172        173        174        175        176        177 
## 0.08700092 0.06503266 0.04818728 0.02076987 0.04342688 0.03835624 0.02371245 
##        179        180        181        182        183        184        185 
## 0.02258678 0.04835921 0.02604953 0.02388468 0.06101136 0.02130620 0.02359497 
##        186        187        188        189        190        191        192 
## 0.02975356 0.15586430 0.21280047 0.03570631 0.02221384 0.02338152 0.03092362 
##        193        195        196        197        199        200        201 
## 0.03092362 0.02403167 0.02383630 0.11673562 0.04324437 0.02417440 0.02339243 
##        202        203        204        205        206        207        208 
## 0.15259125 0.03013264 0.03484924 0.03769252 0.11220629 0.04301548 0.02559923 
##        209        210        211        212        213        214        215 
## 0.03109657 0.07796053 0.03973379 0.02525909 0.04614975 0.02983426 0.02649949 
##        216        217        218        219        220        221        222 
## 0.03519743 0.02898324 0.03424116 0.02465004 0.02456667 0.02438621 0.03324546 
##        223        224        225        226          4         10         11 
## 0.06335831 0.02807062 0.02182825 0.11080832 0.07636888 0.08251882 0.16500629 
##         13         15         16         17         18         19         20 
## 0.12647578 0.06912345 0.05079843 0.06121590 0.07674788 0.10424299 0.10657225 
##         22         23         24         25         26         27         28 
## 0.06251285 0.10662449 0.03848029 0.04511469 0.04603669 0.05014406 0.12398414 
##         29         30         31         32         33         34         35 
## 0.05652843 0.03605258 0.06217451 0.11151147 0.05972453 0.06461689 0.04618386 
##         36         37         40         42         43         44         45 
## 0.04910076 0.07416104 0.08901728 0.05908187 0.10349972 0.07496805 0.04372815 
##         46         47         49         50         51         52         54 
## 0.04775607 0.03593942 0.04940834 0.07560449 0.06043360 0.10894160 0.04302394 
##         56         57         59         60         61         62         63 
## 0.05452813 0.06209328 0.07899048 0.06428578 0.07521412 0.06688152 0.03656224 
##         65         67         68         69         71         75         76 
## 0.05729137 0.03543501 0.07088423 0.04502221 0.07366324 0.11982509 0.06368804 
##         77         78         79         81         82         83         84 
## 0.07482489 0.06492055 0.05288170 0.06026733 0.05139049 0.12683565 0.14186581
plot(hatvalues(data1), type="h")

Examine internally Studentized residuals of the transformed model

rstandard(data1)
##            85            86            87            88            89 
## -1.2249828366 -1.6456905061 -1.8067504409 -0.8736954398 -0.9391407005 
##            91            92            93            94            95 
## -1.6336263130 -1.9942297593 -1.6141186923 -1.4087024179 -1.4468588966 
##            96            97            98            99           100 
## -1.3449933858 -1.5004005364 -0.7750809196 -0.4162043504 -1.3395518477 
##           101           102           103           104           105 
## -1.3395518477 -1.2391079755 -1.1937500490 -0.3697103116 -1.0448202740 
##           106           107           108           109           111 
## -0.8609883070 -0.4195572479 -1.5287490524 -0.8935130592 -0.0706531661 
##           112           113           114           115           116 
## -1.4750848658 -1.1131311824 -1.2708798680 -0.4911455771 -0.8887123610 
##           117           118           119           120           121 
## -0.8887123610 -0.9376241054 -0.2452280710 -1.2512123108 -0.6182075527 
##           123           124           125           126           127 
## -0.7188702422 -0.9413549307 -0.8533991686 -0.8867547270 -0.4022912245 
##           128           129           130           131           132 
## -0.3605644987 -1.3228336325 -0.4711192995 -1.1771345458  0.2323138820 
##           133           134           135           136           137 
##  0.2323138820 -0.7701058593 -0.6089539236 -0.8706843037  0.0072974752 
##           138           139           140           141           142 
## -0.4671848046 -0.4715230782 -0.4995873724 -0.1488245761 -0.3551312362 
##           143           144           145           146           147 
## -0.4072364552  1.0267708816 -0.1737837875 -0.2273392727 -0.3006241975 
##           148           149           150           151           154 
## -0.3006241975 -0.1319403940 -0.0945018024 -0.5516521873 -0.0685705233 
##           155           156           159           160           161 
##  0.3753011494  0.0002030361  0.1288800053 -0.0908988242 -0.1919574320 
##           162           163           164           166           167 
## -0.4204612953  0.5434690830  0.4895578076  0.1440538081 -0.1032444065 
##           168           169           170           172           173 
## -0.1594916814 -0.1987305643  0.2366063829  0.7602856171 -0.3324357423 
##           174           175           176           177           179 
## -0.1374781931 -0.0244927687  0.7489526816  0.3312524001  0.5396950258 
##           180           181           182           183           184 
##  0.8736724618  0.5741204000  0.1608586292  0.3383404012  0.2253255668 
##           185           186           187           188           189 
##  0.2352085118  0.6638278413  0.7136813461  1.2091380220  0.5002725172 
##           190           191           192           193           195 
##  0.4313144483  0.3662836799  0.5436084319  0.5436084319  0.5533095136 
##           196           197           199           200           201 
##  0.5738271949  1.1758874436  0.9220688135  0.6433840891  0.9808825183 
##           202           203           204           205           206 
##  1.3928157546  0.8612693970  0.5372495681  1.0853758622  1.1012937991 
##           207           208           209           210           211 
##  0.7986336007  1.1937944239  1.3585472034  2.0533318450  1.1263409456 
##           212           213           214           215           216 
##  1.4959936871  0.7259209447  1.5658498636  1.1272246473  1.4867434618 
##           217           218           219           220           221 
##  0.9061754624  1.6044518535  1.1975381918  1.2862245225  1.4570188645 
##           222           223           224           225           226 
##  1.6649137499  1.4181425206  1.9100107218  2.4000177605  3.8487117734 
##             4            10            11            13            15 
## -3.0711737887 -2.7178669572 -1.9709092641 -1.9679676837 -0.8485293236 
##            16            17            18            19            20 
## -1.5557813833 -0.6059216149 -1.3416141377 -0.7181919390 -0.7964088226 
##            22            23            24            25            26 
## -0.7285564421 -0.3465422061 -0.6534449866 -0.8899870421 -0.5472450476 
##            27            28            29            30            31 
## -0.8487455640  0.0194914672 -0.9691126700 -0.5017602441  0.4704416320 
##            32            33            34            35            36 
## -0.5454625942 -0.6989650113  0.5052187734 -0.4021094436 -0.6759598910 
##            37            40            42            43            44 
##  0.7663859257  0.3428895136  0.5863132194  1.0838405877  1.4338196327 
##            45            46            47            49            50 
## -0.0292466172  0.0853388606  0.1431764100  0.0683379317  0.5424336585 
##            51            52            54            56            57 
##  0.8407934738  0.1958252015  0.5393048314  0.4555593915 -0.1456222945 
##            59            60            61            62            63 
##  0.7755656830  0.9549596862  1.1137518007  1.2248904616  0.5965149426 
##            65            67            68            69            71 
##  0.4273956173  0.4843154892  0.4394189218  0.4100918090  0.6609296023 
##            75            76            77            78            79 
##  0.9583623332  0.7132860039  0.5078510932  0.8005092208  0.8961047776 
##            81            82            83            84 
##  0.6941609047  1.2936836709  1.2304267574  1.2324694009
plot(rstandard(data1), type="h")

Examine mean-shift t-statistics of the transformed model

rstudent(data1)
##            85            86            87            88            89 
## -1.2267187385 -1.6536900642 -1.8184193289 -0.8731119415 -0.9388277709 
##            91            92            93            94            95 
## -1.6413820133 -2.0112147291 -1.6214888443 -1.4126363113 -1.4513486251 
##            96            97            98            99           100 
## -1.3480777182 -1.5057319367 -0.7742082410 -0.4152356891 -1.3425680463 
##           101           102           103           104           105 
## -1.3425680463 -1.2409862610 -1.1951859422 -0.3688119689 -1.0450908795 
##           106           107           108           109           111 
## -0.8603597955 -0.4185840813 -1.5345561263 -0.8930045587 -0.0704554106 
##           112           113           114           115           116 
## -1.4800091774 -1.1138836574 -1.2730940123 -0.4900962111 -0.8881851504 
##           117           118           119           120           121 
## -0.8881851504 -0.9373041500 -0.2445795766 -1.2532159862 -0.6171314407 
##           123           124           125           126           127 
## -0.7178909574 -0.9410523250 -0.8527449021 -0.8862199869 -0.4013420937 
##           128           129           130           131           132 
## -0.3596816258 -1.3256447662 -0.4700872410 -1.1784190093  0.2316955236 
##           133           134           135           136           137 
##  0.2316955236 -0.7692221158 -0.6078744880 -0.8700899236  0.0072769489 
##           138           139           140           141           142 
## -0.4661565251 -0.4704906388 -0.4985316981 -0.1484151748 -0.3542577947 
##           143           144           145           146           147 
## -0.4062802299  1.0269282935 -0.1733096464 -0.2267326987 -0.2998546896 
##           148           149           150           151           154 
## -0.2998546896 -0.1315756870 -0.0942383381 -0.5505712672 -0.0683785413 
##           155           156           159           160           161 
##  0.3743936056  0.0002024649  0.1285234696 -0.0906452346 -0.1914372820 
##           162           163           164           166           167 
## -0.4194869274  0.5423905203  0.4885096944  0.1436569667 -0.1029570684 
##           168           169           170           172           173 
## -0.1590544055 -0.1981935345  0.2359779337  0.7593809785 -0.3316035749 
##           174           175           176           177           179 
## -0.1370987533 -0.0244239131  0.7480254725  0.3304224655  0.5386177579 
##           180           181           182           183           184 
##  0.8730888800  0.5730362410  0.1604178030  0.3374972102  0.2247237900 
##           185           186           187           188           189 
##  0.2345833407  0.6627814490  0.7126942055  1.2107191961  0.4992163570 
##           190           191           192           193           195 
##  0.4303261154  0.3653910728  0.5425298239  0.5425298239  0.5522281917 
##           196           197           199           200           201 
##  0.5727430471  1.1771607671  0.9216789032  0.6423215865  0.9807776043 
##           202           203           204           205           206 
##  1.3965287335  0.8606418555  0.5361732086  1.0859221533  1.1019565670 
##           207           208           209           210           211 
##  0.7978177547  1.1952307291  1.3618042352  2.0722449776  1.1271966788 
##           212           213           214           215           216 
##  1.5012530322  0.7249528600  1.5723117782  1.1280874077  1.4918531962 
##           217           218           219           220           221 
##  0.9057179958  1.6116349279  1.1990093973  1.2886087379  1.4616627015 
##           222           223           224           225           226 
##  1.6733105301  1.4222105908  1.9244611451  2.4329556248  4.0082858587 
##             4            10            11            13            15 
## -3.1470496769 -2.7682704290 -1.9871677306 -1.9841358683 -0.8478589706 
##            16            17            18            19            20 
## -1.5620619183 -0.6048412824 -1.3446560777 -0.7172116087 -0.7955872905 
##            22            23            24            25            26 
## -0.7275927004 -0.3456840331 -0.6523898352 -0.8894647653 -0.5461653202 
##            27            28            29            30            31 
## -0.8480759179  0.0194366595 -0.9689462099 -0.5007030427  0.4694102156 
##            32            33            34            35            36 
## -0.5443834003 -0.6979573522  0.5041592241 -0.4011605768 -0.6749252681 
##            37            40            42            43            44 
##  0.7654941199  0.3420379649  0.5852293388  1.0843758943  1.4381153646 
##            45            46            47            49            50 
## -0.0291644182  0.0851005480  0.1427818846  0.0681465948  0.5413554381 
##            51            52            54            56            57 
##  0.8400982684  0.1952953939  0.5382277048  0.4545429864 -0.1452213177 
##            59            60            61            62            63 
##  0.7746940999  0.9547222434  1.1145090523  1.2266254462  0.5954324248 
##            65            67            68            69            71 
##  0.4264122293  0.4832716596  0.4384207177  0.4091315637  0.6598806441 
##            75            76            77            78            79 
##  0.9581416568  0.7122982781  0.5067898251  0.7996982207  0.8956065232 
##            81            82            83            84 
##  0.6931471046  1.2961524370  1.2322170352  1.2342802427
critval <- qt(0.05/(2*nobs(data1)), df=df.residual(data1)-1, lower=FALSE)
which(abs(rstudent(data1)) > critval)
## 226 
## 130

Examine Cook’s distances to check influences

cooks.distance(data1)
##           85           86           87           88           89           91 
## 1.541238e-02 1.557582e-02 8.712415e-03 5.272775e-03 5.588235e-03 5.648202e-03 
##           92           93           94           95           96           97 
## 8.358814e-03 7.374591e-03 5.165413e-03 5.728120e-03 4.894315e-03 6.174035e-03 
##           98           99          100          101          102          103 
## 8.180092e-03 1.403467e-03 5.811006e-03 5.811006e-03 1.100437e-02 6.239706e-03 
##          104          105          106          107          108          109 
## 7.505597e-04 2.687943e-03 2.988467e-03 1.748343e-03 1.520418e-02 2.259529e-03 
##          111          112          113          114          115          116 
## 3.180570e-05 6.869498e-03 3.730076e-03 3.879236e-03 1.672892e-03 4.612833e-03 
##          117          118          119          120          121          123 
## 4.612833e-03 3.859746e-03 7.136992e-04 3.072260e-03 2.134110e-03 1.467674e-03 
##          124          125          126          127          128          129 
## 2.870838e-03 1.949674e-03 5.659354e-03 7.089914e-04 9.463889e-04 9.452423e-03 
##          130          131          132          133          134          135 
## 1.052815e-03 4.160898e-03 3.743820e-04 3.743820e-04 3.952931e-03 8.877516e-04 
##          136          137          138          139          140          141 
## 2.018141e-03 2.752472e-07 2.740065e-03 4.709059e-04 5.736027e-04 1.014880e-04 
##          142          143          144          145          146          147 
## 2.898970e-04 4.788707e-04 8.139587e-03 1.137824e-04 1.234379e-04 2.371186e-04 
##          148          149          150          151          154          155 
## 2.371186e-04 4.596981e-05 2.046551e-05 7.659295e-04 5.934132e-05 1.240873e-03 
##          156          159          160          161          162          163 
## 1.112387e-10 5.586360e-04 1.215290e-04 2.859951e-04 1.873085e-03 1.960408e-03 
##          164          166          167          168          169          170 
## 1.009684e-03 1.277006e-04 3.588195e-05 2.710758e-04 6.869733e-05 4.849687e-04 
##          172          173          174          175          176          177 
## 3.655072e-03 5.086320e-04 3.644382e-05 2.475845e-06 2.033939e-03 2.422836e-04 
##          179          180          181          182          183          184 
## 6.118997e-04 3.526231e-03 8.014496e-04 5.755915e-05 6.761848e-04 1.004818e-04 
##          185          186          187          188          189          190 
## 1.215354e-04 1.228500e-03 8.549691e-03 3.592914e-02 8.424735e-04 3.842156e-04 
##          191          192          193          195          196          197 
## 2.920050e-04 8.572588e-04 8.572588e-04 6.853175e-04 7.309469e-04 1.661313e-02 
##          199          200          201          202          203          204 
## 3.493514e-03 9.322479e-04 2.095060e-03 3.175641e-02 2.095127e-03 9.474530e-04 
##          205          206          207          208          209          210 
## 4.194778e-03 1.393538e-02 2.606288e-03 3.403734e-03 5.385038e-03 3.240790e-02 
##          211          212          213          214          215          216 
## 4.772164e-03 5.272242e-03 2.317795e-03 6.854517e-03 3.144340e-03 7.330808e-03 
##          217          218          219          220          221          222 
## 2.228190e-03 8.297371e-03 3.294907e-03 3.787823e-03 4.823965e-03 8.665765e-03 
##          223          224          225          226            4           10 
## 1.236736e-02 9.578472e-03 1.168531e-02 1.678089e-01 7.089799e-02 6.039758e-02 
##           11           13           15           16           17           18 
## 6.978433e-02 5.097716e-02 4.860426e-03 1.177596e-02 2.176400e-03 1.360218e-02 
##           19           20           22           23           24           25 
## 5.456887e-03 6.878031e-03 3.217642e-03 1.302994e-03 1.553479e-03 3.402056e-03 
##           26           27           28           29           30           31 
## 1.313843e-03 3.457197e-03 4.888218e-06 5.115569e-03 8.560178e-04 1.333857e-03 
##           32           33           34           35           36           37 
## 3.394728e-03 2.821083e-03 1.602961e-03 7.117400e-04 2.144879e-03 4.277034e-03 
##           40           42           43           44           45           46 
## 1.044431e-03 1.962317e-03 1.232897e-02 1.514662e-02 3.555808e-06 3.320331e-05 
##           47           49           50           51           52           54 
## 6.947315e-05 2.206669e-05 2.187713e-03 4.133681e-03 4.262183e-04 1.188735e-03 
##           56           57           59           60           61           62 
## 1.088101e-03 1.276286e-04 4.689808e-03 5.695726e-03 9.171542e-03 9.776224e-03 
##           63           65           67           68           69           71 
## 1.227606e-03 1.009205e-03 7.833667e-04 1.339197e-03 7.207805e-04 3.157909e-03 
##           75           76           77           78           79           81 
## 1.136699e-02 3.146097e-03 1.896279e-03 4.044582e-03 4.075924e-03 2.809346e-03 
##           82           83           84 
## 8.242498e-03 1.999237e-02 2.282875e-02
which(cooks.distance(data1) >= 1)
## named integer(0)
plot(cooks.distance(data1), type="h")

Shows the interaction between race and smoking status during pregnancy

lmod.inter <- lm(bwt ~ race * smoke, data = birthwt)
lmod.main <- lm(bwt ~ race + smoke, data = birthwt)
anova(lmod.main, lmod.inter)
## Analysis of Variance Table
## 
## Model 1: bwt ~ race + smoke
## Model 2: bwt ~ race * smoke
##   Res.Df      RSS Df Sum of Sq      F Pr(>F)
## 1    185 87631356                           
## 2    183 85529548  2   2101808 2.2485 0.1085

Shows the interaction between age and race during pregnancy

lmod.inter1 <- lm(bwt ~ age * race, data = birthwt)
lmod.main1 <- lm(bwt ~ age + race, data = birthwt)
anova(lmod.main1, lmod.inter1)
## Analysis of Variance Table
## 
## Model 1: bwt ~ age + race
## Model 2: bwt ~ age * race
##   Res.Df      RSS Df Sum of Sq      F Pr(>F)
## 1    185 94754346                           
## 2    183 92431148  2   2323199 2.2998 0.1032

Shows the interaction between number of physician visits during the first trimester and number of previous premature labors during pregnancy

lmod.inter2 <- lm(bwt ~ ftv * ht, data = birthwt)
lmod.main2 <- lm(bwt ~ ftv + ht, data = birthwt)
anova(lmod.main2, lmod.inter2)
## Analysis of Variance Table
## 
## Model 1: bwt ~ ftv + ht
## Model 2: bwt ~ ftv * ht
##   Res.Df      RSS Df Sum of Sq      F Pr(>F)
## 1    186 97610068                           
## 2    185 97552954  1     57114 0.1083 0.7424

Shows the interaction between mother’s age and mother’s weight in pounds at last menstrual period during pregnancy

lmod.inter3 <- lm(bwt ~ lwt * age, data = birthwt)
lmod.main3 <- lm(bwt ~ lwt + age, data = birthwt)
anova(lmod.main3, lmod.inter3)
## Analysis of Variance Table
## 
## Model 1: bwt ~ lwt + age
## Model 2: bwt ~ lwt * age
##   Res.Df      RSS Df Sum of Sq      F Pr(>F)
## 1    186 96186834                           
## 2    185 95741853  1    444981 0.8598  0.355

Shows the interaction between smoking status and presence of uterine irritability during pregnancy

lmod.inter4 <- lm(bwt ~ smoke * ui, data = birthwt)
lmod.main4 <- lm(bwt ~ smoke + ui, data = birthwt)
anova(lmod.main4, lmod.inter4)
## Analysis of Variance Table
## 
## Model 1: bwt ~ smoke + ui
## Model 2: bwt ~ smoke * ui
##   Res.Df      RSS Df Sum of Sq      F Pr(>F)
## 1    186 88913988                           
## 2    185 88471981  1    442007 0.9243 0.3376

Shows the interaction between age and smoking status during pregnancy

lmod.inter5 <- lm(bwt ~ age * smoke, data = birthwt)
lmod.main5 <- lm(bwt ~ age + smoke, data = birthwt)
anova(lmod.main5, lmod.inter5)
## Analysis of Variance Table
## 
## Model 1: bwt ~ age + smoke
## Model 2: bwt ~ age * smoke
##   Res.Df      RSS Df Sum of Sq      F  Pr(>F)  
## 1    186 95672288                              
## 2    185 93062605  1   2609683 5.1878 0.02389 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

boxcox(lmod.inter5)

par(mfrow = c(2, 2))
plot(lmod.inter5)

# transform lmod.inter5
lmod.inter5_trans <- lm(bwt^1.25 ~ age * smoke, data = birthwt)

boxcox(lmod.inter5_trans)

par(mfrow = c(2, 2))
plot(lmod.inter5)

# didn't seem to change much from the original plots

Examine Leverages of the transformed model

hatvalues(lmod.inter5_trans)
##          85          86          87          88          89          91 
## 0.014443758 0.035590176 0.018179969 0.015549613 0.026666860 0.010422673 
##          92          93          94          95          96          97 
## 0.009292381 0.020812178 0.033220950 0.018528745 0.014443758 0.014443758 
##          98          99         100         101         102         103 
## 0.009292381 0.021376048 0.026666860 0.026666860 0.029527931 0.015782131 
##         104         105         106         107         108         109 
## 0.012139799 0.027248154 0.030265300 0.025527257 0.055085805 0.014834130 
##         111         112         113         114         115         116 
## 0.009422505 0.014834130 0.032523397 0.017811672 0.018528745 0.020812178 
##         117         118         119         120         121         123 
## 0.020812178 0.014110911 0.091640993 0.009422505 0.009422505 0.033220950 
##         124         125         126         127         128         129 
## 0.021885718 0.022350753 0.048392722 0.067866070 0.015549613 0.014443758 
##         130         131         132         133         134         135 
## 0.008748922 0.010422673 0.026666860 0.026666860 0.030265300 0.014443758 
##         136         137         138         139         140         141 
## 0.008792297 0.013994652 0.009292381 0.008748922 0.013994652 0.040269139 
##         142         143         144         145         146         147 
## 0.014443758 0.024876637 0.015549613 0.021376048 0.012139799 0.020812178 
##         148         149         150         151         154         155 
## 0.020812178 0.008748922 0.008792297 0.014834130 0.018528745 0.012139799 
##         156         159         160         161         162         163 
## 0.008792297 0.027248154 0.012139799 0.009292381 0.013994652 0.048392722 
##         164         166         167         168         169         170 
## 0.013515085 0.024876637 0.039455328 0.017334551 0.009422505 0.057591699 
##         172         173         174         175         176         177 
## 0.018179969 0.008748922 0.009292381 0.030265300 0.021376048 0.012139799 
##         179         180         181         182         183         184 
## 0.008748922 0.032523397 0.014443758 0.008748922 0.055085805 0.009292381 
##         185         186         187         188         189         190 
## 0.008792297 0.010422673 0.021885718 0.015782131 0.039455328 0.017811672 
##         191         192         193         195         196         197 
## 0.017811672 0.021885718 0.021885718 0.021376048 0.008792297 0.021885718 
##         199         200         201         202         203         204 
## 0.008792297 0.008748922 0.012139799 0.009422505 0.021376048 0.009292381 
##         205         206         207         208         209         210 
## 0.026666860 0.024876637 0.030265300 0.017334551 0.033220950 0.035590176 
##         211         212         213         214         215         216 
## 0.018179969 0.014834130 0.034766058 0.014834130 0.009422505 0.024876637 
##         217         218         219         220         221         222 
## 0.012139799 0.010639546 0.010422673 0.009292381 0.009422505 0.025527257 
##         223         224         225         226           4          10 
## 0.048000429 0.021885718 0.008792297 0.145261702 0.027248154 0.017811672 
##          11          13          15          16          17          18 
## 0.079215834 0.009422505 0.009422505 0.012443422 0.008748922 0.008792297 
##          19          20          22          23          24          25 
## 0.008792297 0.015549613 0.057591699 0.021885718 0.009422505 0.024876637 
##          26          27          28          29          30          31 
## 0.015782131 0.018179969 0.010422673 0.014110911 0.010422673 0.012139799 
##          32          33          34          35          36          37 
## 0.009422505 0.014443758 0.021885718 0.018528745 0.008792297 0.032523397 
##          40          42          43          44          45          46 
## 0.018179969 0.013994652 0.012443422 0.018179969 0.032523397 0.009422505 
##          47          49          50          51          52          54 
## 0.012139799 0.017334551 0.026666860 0.018179969 0.010422673 0.010639546 
##          56          57          59          60          61          62 
## 0.048392722 0.029527931 0.013515085 0.018179969 0.014110911 0.029527931 
##          63          65          67          68          69          71 
## 0.008748922 0.040269139 0.013994652 0.032523397 0.013515085 0.020812178 
##          75          76          77          78          79          81 
## 0.010639546 0.012139799 0.018528745 0.056545370 0.027248154 0.034766058 
##          82          83          84 
## 0.013515085 0.020812178 0.015549613
plot(hatvalues(lmod.inter5_trans), type="h")

Examine internally Studentized residuals of the transformed model

rstandard(lmod.inter5_trans)
##           85           86           87           88           89           91 
## -0.604869201 -1.169845743 -0.406716082 -0.329039793 -0.399548196 -0.550291522 
##           92           93           94           95           96           97 
## -0.571141358 -0.362651937 -0.028251578 -0.101968251 -0.326312631 -0.310762472 
##           98           99          100          101          102          103 
## -0.410902763 -0.753893853 -0.160106347 -0.160106347 -0.078667045  0.038074502 
##          104          105          106          107          108          109 
## -0.247689132  0.171253509 -0.720637963 -0.676413676 -0.901100982 -0.505991642 
##          111          112          113          114          115          116 
## -0.358243073 -0.485942978  0.011053129 -0.467158328  0.261371640  0.040540631 
##          117          118          119          120          121          123 
##  0.040540631  0.250040697  0.552998569 -0.214727444 -0.214727444  0.423633413 
##          124          125          126          127          128          129 
##  0.164914687  0.290401596  0.520178973  0.619831753  0.309697331  0.161364681 
##          130          131          132          133          134          135 
## -0.007536536  0.076746797  0.263656173  0.263656173 -0.364359790  0.202154834 
##          136          137          138          139          140          141 
## -0.008973175  0.404829044  0.089818549  0.053507314  0.466173560  0.701728605 
##          142          143          144          145          146          147 
##  0.326546188  0.455656564  0.545249580 -0.097174592  0.325095397  0.486143585 
##          148          149          150          151          154          155 
##  0.486143585  0.240812841  0.198707933  0.033310552  0.758227413  0.429744776 
##          156          159          160          161          162          163 
##  0.260577790  0.877091130  0.493401190  0.408450025  0.738806436  0.991906069 
##          164          166          167          168          169          170 
##  0.784928072  0.751688603  0.679275882  0.706061522  0.429243988  1.189041478 
##          172          173          174          175          176          177 
##  0.879013414  0.577537840  0.621313340  0.221928841  0.412946938  0.747070535 
##          179          180          181          182          183          184 
##  0.705073116  1.002694407  0.918332145  0.768374554  0.247960981  0.853021862 
##          185          186          187          188          189          190 
##  0.768586299  0.918369645  1.135176875  1.297085876  1.089475848  0.616877337 
##          191          192          193          195          196          197 
##  0.616877337  1.168730915  1.168730915  0.648984647  0.941670348  1.329568137 
##          199          200          201          202          203          204 
##  1.005774638  1.047861888  1.176203158  0.994569469  0.802591372  1.177576457 
##          205          206          207          208          209          210 
##  1.461746906  1.492783793  0.815744025  1.439546514  1.793841371  0.856489251 
##          211          212          213          214          215          216 
##  1.636651073  1.103528710  1.712286237  1.146972881  1.291897659  1.706080944 
##          217          218          219          220          221          222 
##  1.526320846  1.360909158  1.571484281  1.617361234  1.556799947  1.336908846 
##          223          224          225          226            4           10 
##  1.191918344  2.082018914  2.296281952  2.214276417 -2.490890016 -2.927261576 
##           11           13           15           16           17           18 
## -1.927483403 -2.393506876 -2.221592911 -2.170019390 -1.997534646 -1.899332761 
##           19           20           22           23           24           25 
## -1.864186225 -1.404639117 -1.111951954 -1.338303832 -1.698168747 -1.321681841 
##           26           27           28           29           30           31 
## -1.125252185 -1.254511812 -1.485275172 -1.139449135 -1.430797629 -1.278624318 
##           32           33           34           35           36           37 
## -1.487487239 -1.202268542 -1.077521227 -0.896604133 -1.385644948 -1.080134626 
##           40           42           43           44           45           46 
## -0.994153172 -0.859699592 -1.399088691 -0.880474411 -0.945306931 -1.241774707 
##           47           49           50           51           52           54 
## -1.032574079 -0.893977708 -0.820405639 -0.765697574 -0.991622148 -1.169982978 
##           56           57           59           60           61           62 
## -0.413186795 -0.674794116 -0.590871866 -0.649853307 -0.546413081 -0.636294419 
##           63           65           67           68           69           71 
## -0.926891241 -0.358372434 -0.557748848 -0.686360856 -0.512903496 -0.639819509 
##           75           76           77           78           79           81 
## -1.010378194 -0.747063803 -0.379574075 -0.700251786 -0.329879022 -0.437009026 
##           82           83           84 
## -0.415142929 -0.560987603 -0.466657862
plot(rstandard(lmod.inter5_trans), type="h")

Examine mean-shift t-statistics of the transformed model

rstudent(lmod.inter5_trans)
##           85           86           87           88           89           91 
## -0.603829584 -1.171019065 -0.405796822 -0.328245354 -0.398638905 -0.549251943 
##           92           93           94           95           96           97 
## -0.570098478 -0.361799093 -0.028175179 -0.101695146 -0.325523204 -0.310002360 
##           98           99          100          101          102          103 
## -0.409977837 -0.753011129 -0.159684104 -0.159684104 -0.078455456  0.037971607 
##          104          105          106          107          108          109 
## -0.247059763  0.170803574 -0.719698507 -0.675418782 -0.900640948 -0.504971789 
##          111          112          113          114          115          116 
## -0.357397525 -0.484937433  0.011023218 -0.466169069  0.260712414  0.040431092 
##          117          118          119          120          121          123 
##  0.040431092  0.249406142  0.551958339 -0.214173004 -0.214173004  0.422691979 
##          124          125          126          127          128          129 
##  0.164480458  0.289681697  0.519150981  0.618797127  0.308939272  0.160939296 
##          130          131          132          133          134          135 
## -0.007516140  0.076540310  0.262992038  0.262992038 -0.363504150  0.201630000 
##          136          137          138          139          140          141 
## -0.008948892  0.403912376  0.089577421  0.053362916  0.465185229  0.700762719 
##          142          143          144          145          146          147 
##  0.325756330  0.454678602  0.544211389 -0.096914075  0.324308219  0.485137881 
##          148          149          150          151          154          155 
##  0.485137881  0.240198763  0.198191308  0.033220501  0.757353070  0.428795811 
##          156          159          160          161          162          163 
##  0.259920276  0.876541770  0.492389945  0.407528404  0.737896326  0.991862616 
##          164          166          167          168          169          170 
##  0.784110535  0.750801703  0.678283909  0.705101327  0.428295630  1.190380826 
##          172          173          174          175          176          177 
##  0.878470898  0.576494748  0.620279330  0.221357689  0.412019287  0.746175083 
##          179          180          181          182          183          184 
##  0.704111605  1.002709110  0.917941439  0.767520744  0.247331012  0.852391248 
##          185          186          187          188          189          190 
##  0.767732931  0.917979095  1.136068256  1.299497948  1.090029765  0.615841549 
##          191          192          193          195          196          197 
##  0.615841549  1.169894814  1.169894814  0.647966275  0.941380670  1.332350701 
##          199          200          201          202          203          204 
##  1.005806296  1.048141091  1.177430674  0.994540197  0.801816410  1.178815782 
##          205          206          207          208          209          210 
##  1.466283080  1.497791874  0.815003413  1.443759550  1.804751243  0.855869839 
##          211          212          213          214          215          216 
##  1.644168106  1.104182339  1.721346798  1.147957636  1.294252667  1.715008769 
##          217          218          219          220          221          222 
##  1.531865809  1.364071218  1.577797653  1.624510052  1.562857678  1.339778336 
##          223          224          225          226            4           10 
##  1.193283194  2.101146491  2.323418021  2.238141453 -2.526883453 -2.989391697 
##           11           13           15           16           17           18 
## -1.941864193 -2.424869769 -2.245739163 -2.192226802 -2.013965939 -1.912935230 
##           19           20           22           23           24           25 
## -1.876852792 -1.408367851 -1.112667064 -1.341189975 -1.706928926 -1.324372341 
##           26           27           28           29           30           31 
## -1.126067020 -1.256472483 -1.490166909 -1.140374032 -1.434886573 -1.280835949 
##           32           33           34           35           36           37 
## -1.492413105 -1.203726489 -1.077993105 -0.896126729 -1.389122150 -1.080624221 
##           40           42           43           44           45           46 
## -0.994121675 -0.859090702 -1.402743058 -0.879937132 -0.945033745 -1.243607690 
##           47           49           50           51           52           54 
## -1.032759906 -0.893490297 -0.819677753 -0.764838223 -0.991577190 -1.171157462 
##           56           57           59           60           61           62 
## -0.412258826 -0.673797608 -0.589829574 -0.648835555 -0.545374549 -0.635267893 
##           63           65           67           68           69           71 
## -0.926536625 -0.357526670 -0.556707634 -0.685376504 -0.511879467 -0.638795080 
##           75           76           77           78           79           81 
## -1.010435483 -0.746168339 -0.378694300 -0.699284009 -0.329083050 -0.436051448 
##           82           83           84 
## -0.414212383 -0.559945836 -0.465669073
critval <- qt(0.05/(2*nobs(lmod.inter5_trans)), df=df.residual(lmod.inter5_trans)-1, lower=FALSE)
which(abs(rstudent(lmod.inter5_trans)) > critval)
## named integer(0)

Examine Cook’s distances to check influences

cooks.distance(lmod.inter5_trans)
##           85           86           87           88           89           91 
## 1.340484e-03 1.262600e-02 7.657446e-04 4.275261e-04 1.093424e-03 7.973611e-04 
##           92           93           94           95           96           97 
## 7.649072e-04 6.988300e-04 6.856622e-06 4.907252e-05 3.901275e-04 3.538311e-04 
##           98           99          100          101          102          103 
## 3.959129e-04 3.103645e-03 1.755769e-04 1.755769e-04 4.707341e-05 5.811428e-06 
##          104          105          106          107          108          109 
## 1.884820e-04 2.053780e-04 4.051971e-03 2.996396e-03 1.183407e-02 9.637834e-04 
##          111          112          113          114          115          116 
## 3.051923e-04 8.889214e-04 1.026753e-06 9.894142e-04 3.224225e-04 8.733182e-06 
##          117          118          119          120          121          123 
## 8.733182e-06 2.237115e-04 7.712946e-03 1.096461e-04 1.096461e-04 1.541719e-03 
##          124          125          126          127          128          129 
## 1.521353e-04 4.820001e-04 3.440075e-03 6.992976e-03 3.787396e-04 9.540163e-05 
##          130          131          132          133          134          135 
## 1.253298e-07 1.550922e-05 4.761308e-04 4.761308e-04 1.035841e-03 1.497294e-04 
##          136          137          138          139          140          141 
## 1.785541e-07 5.815220e-04 1.891706e-05 6.317383e-06 7.711132e-04 5.165368e-03 
##          142          143          144          145          146          147 
## 3.906862e-04 1.324181e-03 1.173969e-03 5.156524e-05 3.246965e-04 1.255800e-03 
##          148          149          150          151          154          155 
## 1.255800e-03 1.279588e-04 8.756047e-05 4.176922e-06 2.713360e-03 5.673842e-04 
##          156          159          160          161          162          163 
## 1.505749e-04 5.387217e-03 7.479222e-04 3.912005e-04 1.936797e-03 1.250845e-02 
##          164          166          167          168          169          170 
## 2.110222e-03 3.603695e-03 4.738277e-03 2.198528e-03 4.381536e-04 2.160005e-02 
##          172          173          174          175          176          177 
## 3.576780e-03 7.359898e-04 9.051965e-04 3.842904e-04 9.311938e-04 1.714665e-03 
##          179          180          181          182          183          184 
## 1.096931e-03 8.449531e-03 3.089857e-03 1.302737e-03 8.960950e-04 1.706247e-03 
##          185          186          187          188          189          190 
## 1.309975e-03 2.220774e-03 7.208390e-03 6.744533e-03 1.218887e-02 1.725232e-03 
##          191          192          193          195          196          197 
## 1.725232e-03 7.640825e-03 7.640825e-03 2.299961e-03 1.966416e-03 9.888545e-03 
##          199          200          201          202          203          204 
## 2.243257e-03 2.422808e-03 4.250311e-03 2.352275e-03 3.517552e-03 3.251620e-03 
##          205          206          207          208          209          210 
## 1.463507e-02 1.421235e-02 5.192062e-03 9.138993e-03 2.764349e-02 6.767875e-03 
##          211          212          213          214          215          216 
## 1.239976e-02 4.584163e-03 2.640071e-02 4.952210e-03 3.968936e-03 1.856399e-02 
##          217          218          219          220          221          222 
## 7.157275e-03 4.979283e-03 6.502636e-03 6.133884e-03 5.763463e-03 1.170518e-02 
##          223          224          225          226            4           10 
## 1.790776e-02 2.424826e-02 1.169306e-02 2.083155e-01 4.344944e-02 3.884839e-02 
##           11           13           15           16           17           18 
## 7.990528e-02 1.362346e-02 1.173672e-02 1.483355e-02 8.804395e-03 7.999812e-03 
##           19           20           22           23           24           25 
## 7.706484e-03 7.791037e-03 1.889004e-02 1.001891e-02 6.857718e-03 1.114105e-02 
##           26           27           28           29           30           31 
## 5.075913e-03 7.285356e-03 5.808757e-03 4.645761e-03 5.390460e-03 5.022754e-03 
##           32           33           34           35           36           37 
## 5.261680e-03 5.295924e-03 6.494756e-03 3.794110e-03 4.257764e-03 9.805082e-03 
##           40           42           43           44           45           46 
## 4.575177e-03 2.622505e-03 6.166068e-03 3.588680e-03 7.510021e-03 3.666938e-03 
##           47           49           50           51           52           54 
## 3.275657e-03 3.524523e-03 4.610071e-03 2.714037e-03 2.589178e-03 3.680168e-03 
##           56           57           59           60           61           62 
## 2.170477e-03 3.463638e-03 1.195790e-03 1.954933e-03 1.068339e-03 3.079684e-03 
##           63           65           67           68           69           71 
## 1.895695e-03 1.347200e-03 1.103825e-03 3.959136e-03 9.010308e-04 2.175237e-03 
##           75           76           77           78           79           81 
## 2.744584e-03 1.714634e-03 6.799884e-04 7.347244e-03 7.620517e-04 1.719664e-03 
##           82           83           84 
## 5.902875e-04 1.672238e-03 8.599297e-04
which(cooks.distance(lmod.inter5_trans) >= 1)
## named integer(0)
plot(cooks.distance(lmod.inter5_trans), type="h")


# experimenting with categorical variables separately
lmod.race = lm(bwt ~ race, data = birthwt)
lmod.smoke = lm(bwt ~ smoke, data = birthwt)
lmod.ht = lm(bwt ~ ht, data = birthwt)
lmod.ui = lm(bwt ~ ui, data = birthwt)


anova(lmod.race)
## Analysis of Variance Table
## 
## Response: bwt
##            Df   Sum Sq Mean Sq F value   Pr(>F)   
## race        2  5015725 2507863  4.9125 0.008336 **
## Residuals 186 94953931  510505                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lmod.smoke)
## Analysis of Variance Table
## 
## Response: bwt
##            Df   Sum Sq Mean Sq F value   Pr(>F)   
## smoke       1  3625946 3625946  7.0378 0.008667 **
## Residuals 187 96343710  515207                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lmod.ht)
## Analysis of Variance Table
## 
## Response: bwt
##            Df   Sum Sq Mean Sq F value  Pr(>F)  
## ht          1  2130425 2130425  4.0719 0.04503 *
## Residuals 187 97839231  523204                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lmod.ui)
## Analysis of Variance Table
## 
## Response: bwt
##            Df   Sum Sq Mean Sq F value    Pr(>F)    
## ui          1  8059031 8059031  16.397 7.518e-05 ***
## Residuals 187 91910625  491501                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
boxcox(lmod.race)

par(mfrow = c(2, 2))
plot(lmod.race)

boxcox(lmod.smoke)

par(mfrow = c(2, 2))
plot(lmod.smoke)

boxcox(lmod.race)

par(mfrow = c(2, 2))
plot(lmod.race)

boxcox(lmod.ht)

par(mfrow = c(2, 2))
plot(lmod.ht)

boxcox(lmod.ui)

par(mfrow = c(2, 2))
plot(lmod.ui)

TukeyHSD(aov(bwt ~ race, data = birthwt))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = bwt ~ race, data = birthwt)
## 
## $race
##           diff       lwr        upr     p adj
## 2-1 -383.02644 -756.2363  -9.816581 0.0428037
## 3-1 -297.43517 -566.1652 -28.705095 0.0260124
## 3-2   85.59127 -304.4521 475.634630 0.8624372
with(birthwt, tapply(bwt, race, mean))
##        1        2        3 
## 3102.719 2719.692 2805.284
TukeyHSD(aov(bwt ~ smoke, data = birthwt))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = bwt ~ smoke, data = birthwt)
## 
## $smoke
##          diff       lwr       upr     p adj
## 1-0 -283.7767 -494.7973 -72.75612 0.0086667
with(birthwt, tapply(bwt, smoke, mean))
##        0        1 
## 3055.696 2771.919
TukeyHSD(aov(bwt ~ ht, data = birthwt))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = bwt ~ ht, data = birthwt)
## 
## $ht
##          diff       lwr       upr    p adj
## 1-0 -435.3983 -861.0528 -9.743799 0.045032
with(birthwt, tapply(bwt, ht, mean))
##        0        1 
## 2972.232 2536.833
TukeyHSD(aov(bwt ~ ui, data = birthwt))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = bwt ~ ui, data = birthwt)
## 
## $ui
##          diff       lwr       upr    p adj
## 1-0 -581.2733 -864.4574 -298.0892 7.52e-05
with(birthwt, tapply(bwt, ui, mean))
##        0        1 
## 3030.702 2449.429

hsb dataset

head(hsb)
##    id gender  race    ses schtyp     prog read write math science socst
## 1  70   male white    low public  general   57    52   41      47    57
## 2 121 female white middle public vocation   68    59   53      63    61
## 3  86   male white   high public  general   44    33   54      58    31
## 4 141   male white   high public vocation   63    44   47      53    56
## 5 172   male white middle public academic   47    52   57      53    61
## 6 113   male white middle public academic   44    52   51      63    61
str(hsb)
## 'data.frame':    200 obs. of  11 variables:
##  $ id     : int  70 121 86 141 172 113 50 11 84 48 ...
##  $ gender : Factor w/ 2 levels "female","male": 2 1 2 2 2 2 2 2 2 2 ...
##  $ race   : Factor w/ 4 levels "african-amer",..: 4 4 4 4 4 4 1 3 4 1 ...
##  $ ses    : Factor w/ 3 levels "high","low","middle": 2 3 1 1 3 3 3 3 3 3 ...
##  $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
##  $ prog   : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
##  $ read   : int  57 68 44 63 47 44 50 34 63 57 ...
##  $ write  : int  52 59 33 44 52 52 59 46 57 55 ...
##  $ math   : int  41 53 54 47 57 51 42 45 54 52 ...
##  $ science: int  47 63 58 53 53 63 53 39 58 50 ...
##  $ socst  : int  57 61 31 56 61 61 61 36 51 51 ...
pairs(hsb, col = "dodgerblue")

mod_hsb <- lm(socst ~., data = hsb)
summary(mod_hsb)
## 
## Call:
## lm(formula = socst ~ ., data = hsb)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.6461  -4.7879   0.8008   4.9662  15.8981 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  13.73934    5.47198   2.511  0.01290 *  
## id            0.03210    0.02056   1.561  0.12019    
## gendermale   -0.59257    1.22689  -0.483  0.62968    
## raceasian    -5.80606    3.00871  -1.930  0.05517 .  
## racehispanic -0.27575    2.43386  -0.113  0.90992    
## racewhite    -4.82208    2.51164  -1.920  0.05641 .  
## seslow       -4.91105    1.64123  -2.992  0.00315 ** 
## sesmiddle    -1.02520    1.31772  -0.778  0.43756    
## schtyppublic  3.11868    1.99826   1.561  0.12030    
## proggeneral  -0.66574    1.50878  -0.441  0.65955    
## progvocation -4.16415    1.57032  -2.652  0.00870 ** 
## read          0.35546    0.08307   4.279 3.01e-05 ***
## write         0.36521    0.08830   4.136 5.35e-05 ***
## math          0.07380    0.09147   0.807  0.42083    
## science      -0.03770    0.08560  -0.440  0.66011    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.583 on 185 degrees of freedom
## Multiple R-squared:  0.5362, Adjusted R-squared:  0.5011 
## F-statistic: 15.27 on 14 and 185 DF,  p-value: < 2.2e-16
par(mfrow = c(2, 2))
plot(mod_hsb)

Based on the scale-location plot, the horizontal line goes off at the end. The non-equally spread points indicates possible violation of homoscedasticity.

vif(mod_hsb) > 5
##           id   gendermale    raceasian racehispanic    racewhite       seslow 
##        FALSE        FALSE        FALSE        FALSE        FALSE        FALSE 
##    sesmiddle schtyppublic  proggeneral progvocation         read        write 
##        FALSE        FALSE        FALSE        FALSE        FALSE        FALSE 
##         math      science 
##        FALSE        FALSE

VIF for all predictor variables are smaller than 5, which indicates there is no multicolinearity issue.

boxcox(mod_hsb) #see if we need transformation 

mod_hsb_trans <- lm((socst) ^(3/2) ~., data = hsb)
summary(mod_hsb_trans)
## 
## Call:
## lm(formula = (socst)^(3/2) ~ ., data = hsb)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -247.267  -55.816    6.701   50.998  173.586 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -29.5501    57.7631  -0.512  0.60956    
## id             0.3288     0.2171   1.515  0.13150    
## gendermale    -5.6820    12.9513  -0.439  0.66138    
## raceasian    -62.8003    31.7604  -1.977  0.04949 *  
## racehispanic  -5.1399    25.6922  -0.200  0.84166    
## racewhite    -50.3229    26.5133  -1.898  0.05925 .  
## seslow       -51.4834    17.3251  -2.972  0.00336 ** 
## sesmiddle    -12.7946    13.9101  -0.920  0.35887    
## schtyppublic  32.5109    21.0939   1.541  0.12497    
## proggeneral   -8.7874    15.9270  -0.552  0.58180    
## progvocation -41.7468    16.5766  -2.518  0.01264 *  
## read           3.8658     0.8769   4.409 1.76e-05 ***
## write          3.7858     0.9321   4.062 7.19e-05 ***
## math           0.7996     0.9656   0.828  0.40868    
## science       -0.2980     0.9036  -0.330  0.74193    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 80.05 on 185 degrees of freedom
## Multiple R-squared:  0.5415, Adjusted R-squared:  0.5068 
## F-statistic: 15.61 on 14 and 185 DF,  p-value: < 2.2e-16
par(mfrow = c(2, 2))
plot(mod_hsb_trans)

Based on the Fitted VS Residual plot, the model has a linear relationship and the errors are not correlated. Normal QQ plot looks find which indicates no non-normality error and residual VS leverage plot looks good. Scale-laction plot, especially the red line, still looks kind of off at the end, indicating average magnitude of the standardized residuals is changing much as a function of the fitted values.The sread around the red line vary with the fitted values indicating possible violation of homoscedasticity.

Examine Leverages of the transformed model

hatvalues(mod_hsb_trans)
##          1          2          3          4          5          6          7 
## 0.08760006 0.06654660 0.10707364 0.07988727 0.04774977 0.06529547 0.12317064 
##          8          9         10         11         12         13         14 
## 0.08848586 0.05364112 0.08470966 0.05240767 0.08816233 0.04966763 0.04554697 
##         15         16         17         18         19         20         21 
## 0.12203656 0.05805764 0.05747295 0.07135441 0.04691042 0.06946711 0.14959859 
##         22         23         24         25         26         27         28 
## 0.09652573 0.11229457 0.07550299 0.07184258 0.09295706 0.04638736 0.08592565 
##         29         30         31         32         33         34         35 
## 0.08512408 0.14666987 0.09348278 0.05784220 0.06144604 0.11022622 0.07019808 
##         36         37         38         39         40         41         42 
## 0.06982927 0.08575335 0.04715196 0.09457706 0.05726386 0.06900212 0.05202528 
##         43         44         45         46         47         48         49 
## 0.04160831 0.08857416 0.09620138 0.08348230 0.11191593 0.04102732 0.06966431 
##         50         51         52         53         54         55         56 
## 0.07209880 0.11977638 0.08766678 0.10730625 0.06719178 0.05214204 0.12028562 
##         57         58         59         60         61         62         63 
## 0.09865139 0.08848403 0.05185511 0.07471770 0.07158932 0.07844548 0.08679082 
##         64         65         66         67         68         69         70 
## 0.07407182 0.08957437 0.04149122 0.04790914 0.06535677 0.09876185 0.08049563 
##         71         72         73         74         75         76         77 
## 0.06830948 0.03072163 0.06887855 0.07853779 0.06985366 0.07875184 0.06019573 
##         78         79         80         81         82         83         84 
## 0.06241211 0.06533582 0.07897227 0.03482619 0.10148211 0.06517921 0.12746560 
##         85         86         87         88         89         90         91 
## 0.07126038 0.04203767 0.04895278 0.08080355 0.07937829 0.04582378 0.13428799 
##         92         93         94         95         96         97         98 
## 0.08745787 0.04775667 0.08282564 0.05266734 0.08147288 0.07034849 0.04895356 
##         99        100        101        102        103        104        105 
## 0.07416944 0.06187093 0.04857761 0.07308365 0.07748209 0.04622431 0.05486199 
##        106        107        108        109        110        111        112 
## 0.08324026 0.04767739 0.09638832 0.12999437 0.06831722 0.09590640 0.07451838 
##        113        114        115        116        117        118        119 
## 0.05677262 0.05574984 0.12869202 0.04436096 0.06253553 0.04465007 0.06320879 
##        120        121        122        123        124        125        126 
## 0.07342890 0.13711394 0.04473522 0.05560759 0.04667800 0.11808045 0.04139177 
##        127        128        129        130        131        132        133 
## 0.05689552 0.12319817 0.06145225 0.08621516 0.03029201 0.13328840 0.07712424 
##        134        135        136        137        138        139        140 
## 0.05284149 0.06502459 0.06529938 0.05584540 0.09376271 0.07789263 0.11189291 
##        141        142        143        144        145        146        147 
## 0.09832406 0.05478070 0.06151496 0.07478608 0.02949791 0.06945005 0.06757228 
##        148        149        150        151        152        153        154 
## 0.05897733 0.07765690 0.07924103 0.09675952 0.04056916 0.05448368 0.08705666 
##        155        156        157        158        159        160        161 
## 0.05489755 0.04887893 0.03330984 0.05062614 0.05913023 0.09998584 0.05383809 
##        162        163        164        165        166        167        168 
## 0.04729133 0.07957406 0.07675927 0.03944349 0.06560886 0.05051502 0.05984618 
##        169        170        171        172        173        174        175 
## 0.04731782 0.06038065 0.05291020 0.07627798 0.13199544 0.12143042 0.07931966 
##        176        177        178        179        180        181        182 
## 0.05435546 0.05505856 0.07517485 0.07328618 0.08333103 0.04832971 0.06019467 
##        183        184        185        186        187        188        189 
## 0.06840350 0.05422377 0.17280982 0.12743313 0.05680168 0.13925401 0.05526570 
##        190        191        192        193        194        195        196 
## 0.09084360 0.05930646 0.10210514 0.06757852 0.16049308 0.06338056 0.17238198 
##        197        198        199        200 
## 0.05614329 0.10793944 0.04537972 0.04522051
plot(hatvalues(mod_hsb_trans), type="h")

Examine internally Studentized residuals of the transformed model

rstandard(mod_hsb_trans)
##            1            2            3            4            5            6 
##  1.107000432  0.371045020 -1.517239062  0.463396106  0.990416926  1.500687398 
##            7            8            9           10           11           12 
##  0.664944301 -1.267976453 -0.950450782 -1.192223564  1.646027863  0.446079468 
##           13           14           15           16           17           18 
##  0.924103545 -1.861930306  0.537003471  1.637409563  0.549219670 -0.061613379 
##           19           20           21           22           23           24 
## -0.641787714 -0.406465135 -2.101440101  1.001234946  0.752874015  0.279306967 
##           25           26           27           28           29           30 
##  0.472915955 -1.104909208  0.084937757 -0.421054088 -0.109997030 -0.156937673 
##           31           32           33           34           35           36 
##  1.129816736 -0.349638957  1.096589276 -1.467905398  0.652378171  0.575627086 
##           37           38           39           40           41           42 
##  0.799041856  0.540314553 -0.153322827  1.711159690  0.321019807 -0.820032304 
##           43           44           45           46           47           48 
## -0.763513336 -0.323188446  0.797174763 -1.253519261 -0.142304516 -1.613806192 
##           49           50           51           52           53           54 
## -1.184378109 -0.977689629  0.278147515 -0.243193452 -0.476286845  1.897854629 
##           55           56           57           58           59           60 
##  0.049859827 -0.119630494 -0.021018462  0.558012723 -2.386055481  0.313286030 
##           61           62           63           64           65           66 
##  1.674034110  1.154287026  0.307401467 -0.302642379  1.514184679 -1.891916826 
##           67           68           69           70           71           72 
##  0.996387361 -1.430938934 -0.847267858 -0.935479687  0.337008461  0.407263585 
##           73           74           75           76           77           78 
##  0.075245743  0.632765309  0.659347075 -1.611253064  0.065141212  0.584809612 
##           79           80           81           82           83           84 
##  1.090003172  0.712636521 -0.836019576 -0.548973997 -0.815820381 -0.425396634 
##           85           86           87           88           89           90 
## -0.462362382  0.784700977  0.734536662  1.821189849 -1.250532745  0.159565683 
##           91           92           93           94           95           96 
## -1.236542560 -0.944905034 -0.412020226  0.492734776  0.898170456 -0.561177883 
##           97           98           99          100          101          102 
## -1.269390450  1.113173174  0.509345714 -0.342309944  0.435112087  0.906381074 
##          103          104          105          106          107          108 
## -1.165245406 -1.235271222 -1.012566706  0.007000562 -0.923294914  0.214262235 
##          109          110          111          112          113          114 
##  1.113223638  0.783014341  0.481896490  0.467611663  0.436837740 -0.783429461 
##          115          116          117          118          119          120 
##  0.232443450  0.086406526 -0.940077593 -0.193988931  0.175640124  1.109867824 
##          121          122          123          124          125          126 
## -0.185572302  0.605407285  0.441012158 -0.241527750 -0.724649954  0.228397303 
##          127          128          129          130          131          132 
##  0.943096602 -0.599695554 -0.495083916 -1.045927743 -0.392175491  0.156023620 
##          133          134          135          136          137          138 
## -1.080088166 -0.002035560  1.607997314  0.040970115 -1.141281063  0.672905647 
##          139          140          141          142          143          144 
## -0.016469461  1.167670596  1.535960265  0.011134988  0.628872021 -1.959022809 
##          145          146          147          148          149          150 
##  1.284006522  1.228408005 -2.551063985  0.113395935 -0.618587987 -1.449067177 
##          151          152          153          154          155          156 
##  0.035940372 -0.667565926  0.277919711  1.142463184  1.274928012  1.205105922 
##          157          158          159          160          161          162 
##  1.385249857  1.599191486 -0.456524000 -0.313238059  1.077942453  0.551784999 
##          163          164          165          166          167          168 
## -3.219641871  0.362269715  0.680342845 -0.855932813  0.569604905 -1.098272409 
##          169          170          171          172          173          174 
## -2.496010708 -1.792057999 -0.625964944  2.025696084 -1.211950882 -0.949016073 
##          175          176          177          178          179          180 
##  0.381867299  1.079884602  1.286365684 -1.111275870 -1.619328460 -0.355667462 
##          181          182          183          184          185          186 
## -1.185243229  0.142494857  0.269815767 -0.048887399  0.413188487  2.283733859 
##          187          188          189          190          191          192 
## -0.674494781 -1.079016150 -0.156480564  2.274219364 -0.318386627 -1.054654210 
##          193          194          195          196          197          198 
##  0.994000249  0.120991172 -0.102920319  0.793101249  0.129132679  0.134712098 
##          199          200 
##  0.385286606 -0.601720723
plot(rstandard(mod_hsb_trans), type="h")

Examine mean-shift t-statistics of the transformed model

rstudent(mod_hsb_trans)
##            1            2            3            4            5            6 
##  1.107679244  0.370178603 -1.522635837  0.462410433  0.990365594  1.505819459 
##            7            8            9           10           11           12 
##  0.663938602 -1.270075761 -0.950201275 -1.193591129  1.653727567  0.445111661 
##           13           14           15           16           17           18 
##  0.923737053 -1.874538145  0.535968032  1.644941283  0.548180367 -0.061447262 
##           19           20           21           22           23           24 
## -0.640764512 -0.405546218 -2.121223126  1.001241670  0.751989353  0.278609810 
##           25           26           27           28           29           30 
##  0.471921415 -1.105572824  0.084709536 -0.420115910 -0.109702926 -0.156523362 
##           31           32           33           34           35           36 
##  1.130666548 -0.348807971  1.097193221 -1.472533329  0.651362267  0.574584018 
##           37           38           39           40           41           42 
##  0.798258013  0.539277937 -0.152917596  1.720196072  0.320240217 -0.819303388 
##           43           44           45           46           47           48 
## -0.762649529 -0.322404808  0.796386315 -1.255469861 -0.141927156 -1.620888254 
##           49           50           51           52           53           54 
## -1.185676464 -0.977572426  0.277452767 -0.242574059 -0.475289331  1.911416941 
##           55           56           57           58           59           60 
##  0.049725222 -0.119311345 -0.020961604  0.556971460 -2.417080657  0.312521076 
##           61           62           63           64           65           66 
##  1.682293925  1.155330996  0.306647853 -0.301898061  1.519532085 -1.905318529 
##           67           68           69           70           71           72 
##  0.996367833 -1.435029867 -0.846619023 -0.935162402  0.336199609  0.406343577 
##           73           74           75           76           77           78 
##  0.075043249  0.631736814  0.658336621 -1.618287394  0.064965661  0.583766747 
##           79           80           81           82           83           84 
##  1.090560766  0.711685376 -0.835336443 -0.547934758 -0.815079976 -0.424453001 
##           85           86           87           88           89           90 
## -0.461377713  0.783882918  0.733619303  1.832764510 -1.252453166  0.159144792 
##           91           92           93           94           95           96 
## -1.238324058 -0.944630016 -0.411093807  0.491724022  0.897699069 -0.560136086 
##           97           98           99          100          101          102 
## -1.271504537  1.113897305  0.508323786 -0.341491693  0.434156722  0.905941815 
##          103          104          105          106          107          108 
## -1.166379999 -1.237040295 -1.012636303  0.006981617 -0.922924999  0.213708881 
##          109          110          111          112          113          114 
##  1.113948142  0.782192432  0.480894223  0.466621979  0.435880361 -0.782608497 
##          115          116          117          118          119          120 
##  0.231848232  0.086174418 -0.939780756 -0.193483606  0.175179384  1.110567601 
##          121          122          123          124          125          126 
## -0.185087303  0.604367809  0.440049993 -0.240912074 -0.723716642  0.227811297 
##          127          128          129          130          131          132 
##  0.942813368 -0.598654726 -0.494071446 -1.046194911 -0.391276801  0.155611602 
##          133          134          135          136          137          138 
## -1.080577445 -0.002030051  1.614971032  0.040859421 -1.142220437  0.671907295 
##          139          140          141          142          143          144 
## -0.016424900  1.168825556  1.541664815  0.011104856  0.627841501 -1.974306500 
##          145          146          147          148          149          150 
##  1.286275840  1.230110568 -2.590125436  0.113092974 -0.617552861 -1.453417334 
##          151          152          153          154          155          156 
##  0.035843229 -0.666562570  0.277225438  1.143411933  1.277100380  1.206589760 
##          157          158          159          160          161          162 
##  1.388721915  1.606002696 -0.455545152 -0.312473196  1.078417178  0.550745052 
##          163          164          165          166          167          168 
## -3.304853359  0.361417499  0.679351984 -0.855311592  0.568562132 -1.098888324 
##          169          170          171          172          173          174 
## -2.532258980 -1.802925312 -0.624933012  2.042998517 -1.213497841 -0.948759920 
##          175          176          177          178          179          180 
##  0.380984008  1.080372497  1.288660481 -1.111985997 -1.626514365 -0.354826228 
##          181          182          183          184          185          186 
## -1.186549157  0.142117013  0.269138508 -0.048755407  0.412260516  2.310352287 
##          187          188          189          190          191          192 
## -0.673497979 -1.079498148 -0.156067399  2.300449460 -0.317611986 -1.054976186 
##          193          194          195          196          197          198 
##  0.993967936  0.120668500 -0.102644718  0.792302910  0.128789003  0.134354108 
##          199          200 
##  0.384398133 -0.600680337
critval <- qt(0.05/(2*nobs(mod_hsb_trans)), df=df.residual(mod_hsb_trans)-1, lower=FALSE)
which(abs(rstudent(mod_hsb_trans)) > critval)
## named integer(0)

Examine Cook’s distances to check influences

cooks.distance(mod_hsb_trans)
##            1            2            3            4            5            6 
## 7.843746e-03 6.543275e-04 1.840279e-02 1.242940e-03 3.279178e-03 1.048813e-02 
##            7            8            9           10           11           12 
## 4.140677e-03 1.040499e-02 3.413579e-03 8.769972e-03 9.989792e-03 1.282622e-03 
##           13           14           15           16           17           18 
## 2.975417e-03 1.102911e-02 2.672246e-03 1.101688e-02 1.226226e-03 1.944597e-05 
##           19           20           21           22           23           24 
## 1.351535e-03 8.222480e-04 5.179005e-02 7.140161e-03 4.780169e-03 4.247476e-04 
##           25           26           27           28           29           30 
## 1.154083e-03 8.340968e-03 2.339580e-05 1.111030e-03 7.505176e-05 2.822195e-04 
##           31           32           33           34           35           36 
## 8.775671e-03 5.003450e-04 5.248454e-03 1.779550e-02 2.142113e-03 1.658311e-03 
##           37           38           39           40           41           42 
## 3.992413e-03 9.631150e-04 1.637030e-04 1.185715e-02 5.091973e-04 2.460302e-03 
##           43           44           45           46           47           48 
## 1.687249e-03 6.767155e-04 4.509469e-03 9.541669e-03 1.701312e-04 7.428110e-03 
##           49           50           51           52           53           54 
## 7.002613e-03 4.951503e-03 7.018386e-04 3.788733e-04 1.817893e-03 1.729651e-02 
##           55           56           57           58           59           60 
## 9.117065e-06 1.304562e-04 3.223451e-06 2.015105e-03 2.075806e-02 5.283723e-04 
##           61           62           63           64           65           66 
## 1.440607e-02 7.561071e-03 5.987205e-04 4.884768e-04 1.503854e-02 1.032935e-02 
##           67           68           69           70           71           72 
## 3.330467e-03 9.545433e-03 5.244449e-03 5.107354e-03 5.551363e-04 3.504738e-04 
##           73           74           75           76           77           78 
## 2.792224e-05 2.275072e-03 2.176581e-03 1.479518e-02 1.811960e-05 1.517730e-03 
##           79           80           81           82           83           84 
## 5.536815e-03 2.902998e-03 1.681288e-03 2.269212e-03 3.093702e-03 1.762411e-03 
##           85           86           87           88           89           90 
## 1.093523e-03 1.801388e-03 1.851446e-03 1.943754e-02 8.989166e-03 8.151734e-05 
##           91           92           93           94           95           96 
## 1.581214e-02 5.704677e-03 5.675863e-04 1.461666e-03 2.989958e-03 1.862220e-03 
##           97           98           99          100          101          102 
## 8.128938e-03 4.252230e-03 1.385567e-03 5.151952e-04 6.444271e-04 4.318273e-03 
##          103          104          105          106          107          108 
## 7.602739e-03 4.930121e-03 3.967641e-03 2.966555e-07 2.845234e-03 3.264695e-04 
##          109          110          111          112          113          114 
## 1.234457e-02 2.997162e-03 1.642292e-03 1.173748e-03 7.657229e-04 2.415823e-03 
##          115          116          117          118          119          120 
## 5.320143e-04 2.310516e-05 3.930141e-03 1.172525e-04 1.387685e-04 6.507880e-03 
##          121          122          123          124          125          126 
## 3.648070e-04 1.144274e-03 7.634691e-04 1.904212e-04 4.687209e-03 1.501632e-04 
##          127          128          129          130          131          132 
## 3.577168e-03 3.368784e-03 1.069911e-03 6.881003e-03 3.202999e-04 2.495785e-04 
##          133          134          135          136          137          138 
## 6.499424e-03 1.541093e-08 1.198828e-02 7.817712e-06 5.136156e-03 3.123240e-03 
##          139          140          141          142          143          144 
## 1.527504e-06 1.145214e-02 1.715055e-02 4.790526e-07 1.728171e-03 2.068075e-02 
##          145          146          147          148          149          150 
## 3.340704e-03 7.508045e-03 3.144162e-02 5.372652e-05 2.147827e-03 1.204731e-02 
##          151          152          153          154          155          156 
## 9.224953e-06 1.256260e-03 2.967186e-04 8.297578e-03 6.294398e-03 4.975596e-03 
##          157          158          159          160          161          162 
## 4.408087e-03 9.091745e-03 8.732046e-04 7.266862e-04 4.407822e-03 1.007558e-03 
##          163          164          165          166          167          168 
## 5.974569e-02 7.274258e-04 1.267117e-03 3.429429e-03 1.150770e-03 5.118780e-03 
##          169          170          171          172          173          174 
## 2.062901e-02 1.375811e-02 1.459342e-03 2.258995e-02 1.489072e-02 8.298646e-03 
##          175          176          177          178          179          180 
## 8.375401e-04 4.468674e-03 6.427730e-03 6.692146e-03 1.382468e-02 7.666395e-04 
##          181          182          183          184          185          186 
## 4.756105e-03 8.670163e-05 3.563640e-04 9.134903e-06 2.377761e-03 5.077891e-02 
##          187          188          189          190          191          192 
## 1.826519e-03 1.255733e-02 9.549389e-05 3.445317e-02 4.260614e-04 8.432397e-03 
##          193          194          195          196          197          198 
## 4.773953e-03 1.865727e-04 4.778635e-05 8.734297e-03 6.612609e-05 1.463888e-04 
##          199          200 
## 4.704438e-04 1.143223e-03
which(cooks.distance(mod_hsb_trans) >= 1)
## named integer(0)
plot(cooks.distance(mod_hsb_trans), type="h")

Interaction

?hsb
mod_int1 <- lm(socst ~ gender * race, data = hsb)
mod_inta <- lm(socst ~ gender + race, data = hsb)
anova(mod_int1, mod_inta)[2,4] > 0.05
## [1] FALSE
mod_int2 <- lm(socst ~ gender * ses, data =hsb)
mod_intb <- lm(socst ~ gender + ses, data = hsb)
anova(mod_int2, mod_intb)[2,4] > 0.05
## [1] FALSE
mod_int3 <- lm(socst ~ ses * race, data =hsb)
mod_intc <- lm(socst ~ ses + race, data = hsb)
anova(mod_int3, mod_intc)[2,4] > 0.05
## [1] FALSE
mod_int4 <- lm(socst ~ schtyp * race, data =hsb)
mod_intd <- lm(socst ~ schtyp + race, data = hsb)
anova(mod_int4, mod_intd)[2,4] > 0.05
## [1] FALSE
mod_int5 <- lm(socst ~ schtyp * prog, data =hsb)
mod_inte <- lm(socst ~ schtyp + prog, data = hsb)
anova(mod_int5, mod_inte)[2,4] > 0.05
## [1] FALSE
mod_int6 <- lm(socst ~ ses * prog, data =hsb)
mod_intf <- lm(socst ~ ses + prog, data = hsb)
anova(mod_int6, mod_intf)[2,4] > 0.05
## [1] FALSE

No interaction effect between the varibles above as all the p values are not significant.

Use Generalzied Square Method to fix the error issue.

mod_hsb_glm <- glm((socst) ^ 3/2 ~ ., data = hsb)
summary(mod_hsb_glm)
## 
## Call:
## glm(formula = (socst)^3/2 ~ ., data = hsb)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -78673  -21777     975   19273   80918  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -79314.46   22067.80  -3.594 0.000417 ***
## id              111.62      82.92   1.346 0.179949    
## gendermale    -1561.40    4947.90  -0.316 0.752686    
## raceasian    -24133.08   12133.74  -1.989 0.048184 *  
## racehispanic  -3566.47    9815.42  -0.363 0.716756    
## racewhite    -17852.81   10129.13  -1.763 0.079633 .  
## seslow       -18517.10    6618.87  -2.798 0.005693 ** 
## sesmiddle     -6707.77    5314.21  -1.262 0.208454    
## schtyppublic  11462.67    8058.72   1.422 0.156596    
## proggeneral   -4774.20    6084.73  -0.785 0.433680    
## progvocation -13202.52    6332.90  -2.085 0.038465 *  
## read           1545.92     335.00   4.615 7.34e-06 ***
## write          1324.04     356.09   3.718 0.000266 ***
## math            317.97     368.89   0.862 0.389822    
## science         -15.96     345.20  -0.046 0.963176    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 935287758)
## 
##     Null deviance: 3.7569e+11  on 199  degrees of freedom
## Residual deviance: 1.7303e+11  on 185  degrees of freedom
## AIC: 4715.3
## 
## Number of Fisher Scoring iterations: 2
par(mfrow = c(2, 2))
plot(mod_hsb_glm)

library(MASS)
#Huber’s method
ghuber_hsb <- rlm((socst) ^ 3/2 ~.,data = hsb)
par(mfrow = c(2, 2))
plot(ghuber_hsb)

None of the remedies fix the scale-location plot.


Just Experiments

one categorical variable

library(tidyverse)
library(MASS)
str(denim)
## 'data.frame':    95 obs. of  2 variables:
##  $ waste   : num  1.2 16.4 12.1 11.5 24 10.1 -6 9.7 10.2 -3.7 ...
##  $ supplier: Factor w/ 5 levels "1","2","3","4",..: 1 2 3 4 5 1 2 3 4 5 ...
##  - attr(*, "na.action")= 'omit' Named int [1:15] 70 75 80 85 90 95 98 99 100 103 ...
##   ..- attr(*, "names")= chr [1:15] "70" "75" "80" "85" ...
ggplot(denim, aes(x = denim$supplier)) + geom_bar() + labs(y = "denim$waste")

par(mfrow = c(2, 2))
plot(denim$supplier, denim$waste)
mod_denim1 <- lm(waste ~ supplier, data = denim)
summary(mod_denim1)
## 
## Call:
## lm(formula = waste ~ supplier, data = denim)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.432  -4.377  -1.323   2.639  61.368 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   4.5227     2.1021   2.152   0.0341 *
## supplier2     4.3091     2.9728   1.450   0.1507  
## supplier3     0.3089     3.0879   0.100   0.9206  
## supplier4     2.9667     3.0879   0.961   0.3392  
## supplier5     5.8542     3.4491   1.697   0.0931 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.86 on 90 degrees of freedom
## Multiple R-squared:  0.04901,    Adjusted R-squared:  0.006747 
## F-statistic:  1.16 on 4 and 90 DF,  p-value: 0.334
par(mfrow = c(2, 2))

plot(mod_denim1)

stripchart(denim$waste ~ denim$supplier)
TukeyHSD(aov(mod_denim1))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = mod_denim1)
## 
## $supplier
##           diff        lwr       upr     p adj
## 2-1  4.3090909  -3.966713 12.584895 0.5976181
## 3-1  0.3088517  -8.287424  8.905127 0.9999769
## 4-1  2.9667464  -5.629529 11.563022 0.8718682
## 5-1  5.8541958  -3.747712 15.456104 0.4408168
## 3-2 -4.0002392 -12.596515  4.596036 0.6946720
## 4-2 -1.3423445  -9.938620  7.253931 0.9924515
## 5-2  1.5451049  -8.056803 11.147013 0.9915352
## 4-3  2.6578947  -6.247327 11.563116 0.9203538
## 5-3  5.5453441  -4.334112 15.424800 0.5251000
## 5-4  2.8874494  -6.992007 12.766906 0.9258057
min(denim$waste)
## [1] -11.6
boxcox(waste + 12 ~ supplier, data = denim)
mod_denim_trans = lm((waste + 12) ^ 1/2 ~., data = denim)
summary(mod_denim_trans)
## 
## Call:
## lm(formula = (waste + 12)^1/2 ~ ., data = denim)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2159  -2.1886  -0.6614   1.3197  30.6841 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   8.2614     1.0510   7.860 7.91e-12 ***
## supplier2     2.1545     1.4864   1.450   0.1507    
## supplier3     0.1544     1.5440   0.100   0.9206    
## supplier4     1.4834     1.5440   0.961   0.3392    
## supplier5     2.9271     1.7246   1.697   0.0931 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.93 on 90 degrees of freedom
## Multiple R-squared:  0.04901,    Adjusted R-squared:  0.006747 
## F-statistic:  1.16 on 4 and 90 DF,  p-value: 0.334
TukeyHSD(aov(mod_denim_trans))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = mod_denim_trans)
## 
## $supplier
##           diff       lwr      upr     p adj
## 2-1  2.1545455 -1.983356 6.292447 0.5976181
## 3-1  0.1544258 -4.143712 4.452564 0.9999769
## 4-1  1.4833732 -2.814765 5.781511 0.8718682
## 5-1  2.9270979 -1.873856 7.728052 0.4408168
## 3-2 -2.0001196 -6.298257 2.298018 0.6946720
## 4-2 -0.6711722 -4.969310 3.626965 0.9924515
## 5-2  0.7725524 -4.028402 5.573506 0.9915352
## 4-3  1.3289474 -3.123663 5.781558 0.9203538
## 5-3  2.7726721 -2.167056 7.712400 0.5251000
## 5-4  1.4437247 -3.496003 6.383453 0.9258057
par(mfrow = c(2, 2))

plot(mod_denim_trans) #plot[4] worth paying attention


butterfat (two categorial variables)

mod_bf <- lm(Butterfat ~ ., data = butterfat)
mod_bf_int <- lm(Butterfat ~ Breed * Age, data = butterfat)
interaction.plot(butterfat$Breed,butterfat$Age, butterfat$Butterfat)

summary(mod_bf)
## 
## Call:
## lm(formula = Butterfat ~ ., data = butterfat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0202 -0.2373 -0.0640  0.2617  1.2098 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.00770    0.10135  39.541  < 2e-16 ***
## BreedCanadian          0.37850    0.13085   2.893  0.00475 ** 
## BreedGuernsey          0.89000    0.13085   6.802 9.48e-10 ***
## BreedHolstein-Fresian -0.39050    0.13085  -2.984  0.00362 ** 
## BreedJersey            1.23250    0.13085   9.419 3.16e-15 ***
## AgeMature              0.10460    0.08276   1.264  0.20937    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4138 on 94 degrees of freedom
## Multiple R-squared:  0.6825, Adjusted R-squared:  0.6656 
## F-statistic: 40.41 on 5 and 94 DF,  p-value: < 2.2e-16
summary(mod_bf_int)
## 
## Call:
## lm(formula = Butterfat ~ Breed * Age, data = butterfat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0190 -0.2720 -0.0430  0.2372  1.3170 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.9660     0.1316  30.143  < 2e-16 ***
## BreedCanadian                     0.5220     0.1861   2.805  0.00616 ** 
## BreedGuernsey                     0.9330     0.1861   5.014 2.65e-06 ***
## BreedHolstein-Fresian            -0.3030     0.1861  -1.628  0.10693    
## BreedJersey                       1.1670     0.1861   6.272 1.22e-08 ***
## AgeMature                         0.1880     0.1861   1.010  0.31503    
## BreedCanadian:AgeMature          -0.2870     0.2631  -1.091  0.27834    
## BreedGuernsey:AgeMature          -0.0860     0.2631  -0.327  0.74457    
## BreedHolstein-Fresian:AgeMature  -0.1750     0.2631  -0.665  0.50773    
## BreedJersey:AgeMature             0.1310     0.2631   0.498  0.61982    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4161 on 90 degrees of freedom
## Multiple R-squared:  0.6926, Adjusted R-squared:  0.6619 
## F-statistic: 22.53 on 9 and 90 DF,  p-value: < 2.2e-16
anova(mod_bf, mod_bf_int)
## Analysis of Variance Table
## 
## Model 1: Butterfat ~ Breed + Age
## Model 2: Butterfat ~ Breed * Age
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     94 16.094                           
## 2     90 15.580  4   0.51387 0.7421 0.5658
anova(mod_bf, mod_bf_int)[2,6] > 0.05
## [1] TRUE
par(mfrow = c(2, 2))
plot(mod_bf)

boxcox(mod_bf)

# transform mod_bf
mod_bf_trans <- lm((Butterfat) ^ (-1.4) ~., data = butterfat)
boxcox(mod_bf_trans)

par(mfrow = c(2, 2))
plot(mod_bf_trans)

Examine Leverages of the transformed model

hatvalues(mod_bf_trans)
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 
##   17   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32 
## 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 
##   33   34   35   36   37   38   39   40   41   42   43   44   45   46   47   48 
## 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 
##   49   50   51   52   53   54   55   56   57   58   59   60   61   62   63   64 
## 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 
##   65   66   67   68   69   70   71   72   73   74   75   76   77   78   79   80 
## 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 
##   81   82   83   84   85   86   87   88   89   90   91   92   93   94   95   96 
## 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 
##   97   98   99  100 
## 0.06 0.06 0.06 0.06
plot(hatvalues(mod_bf_trans), type="h")
## Warning in plot.window(...): relative range of values ( 0 * EPS) is small (axis
## 2)

Examine internally Studentized residuals of the transformed model

rstandard(mod_bf_trans)
##            1            2            3            4            5            6 
##  1.302175124 -0.038684857  1.176756932  0.843010892 -0.059132740 -0.214583261 
##            7            8            9           10           11           12 
## -0.607305220  0.216601983 -0.092888120 -0.838010387 -1.105496473 -1.198459522 
##           13           14           15           16           17           18 
## -0.545540009  1.137560855  0.008974190  0.367430174 -1.020923831 -0.385358497 
##           19           20           21           22           23           24 
## -0.905989144  1.959861910  1.738937095 -1.495282498 -0.033796346  0.224513753 
##           25           26           27           28           29           30 
##  1.198241419  0.803309241  0.219953303  1.224316878 -0.006208173 -1.705575476 
##           31           32           33           34           35           36 
##  0.486532361 -2.078760161  0.077454061  1.152041151 -0.430498446  0.194061984 
##           37           38           39           40           41           42 
## -0.982070298 -0.823597445  0.162487176  0.073940420  1.007682878 -0.733936195 
##           43           44           45           46           47           48 
## -1.413229616 -0.454474406  0.749448915  0.093751336  0.552280746  2.829679897 
##           49           50           51           52           53           54 
## -0.630240456  0.406924472 -0.666971410  0.822372580  0.876878115 -0.454474406 
##           55           56           57           58           59           60 
##  0.293918731  0.688775444 -0.015161843 -1.103095821 -0.828222845 -2.021906115 
##           61           62           63           64           65           66 
##  1.365837893  0.317634100 -0.612931689 -1.366439777 -2.623365192 -0.365173950 
##           67           68           69           70           71           72 
##  1.915781005 -1.291943155  0.468040966  0.365631301 -0.451440051 -0.189649999 
##           73           74           75           76           77           78 
##  0.468040966  1.180235981 -0.115875664 -1.329304913  0.421319025  0.317634100 
##           79           80           81           82           83           84 
##  0.610104477  0.915864576  1.023007453 -1.963042444  0.218568783  1.509555514 
##           85           86           87           88           89           90 
##  0.104266735 -0.969700347 -0.203148991  0.800634831  0.218568783 -0.169371739 
##           91           92           93           94           95           96 
## -0.753195015  0.004082009  0.085520888  0.824236537  0.218568783  2.301425234 
##           97           98           99          100 
## -1.083112579  0.616015843 -1.782602653 -1.000277627
plot(rstandard(mod_bf_trans), type="h")

Examine mean-shift t-statistics of the transformed model

rstudent(mod_bf_trans)
##            1            2            3            4            5            6 
##  1.307072894 -0.038478843  1.179198734  0.841702597 -0.058818457 -0.213491104 
##            7            8            9           10           11           12 
## -0.605254796  0.215500551 -0.092396954 -0.836672158 -1.106819034 -1.201280711 
##           13           14           15           16           17           18 
## -0.543491500  1.139363484  0.008926331  0.365733264 -1.021156008 -0.383606367 
##           19           20           21           22           23           24 
## -0.905117616  1.990501664  1.758173355 -1.505317976 -0.033616302  0.223376237 
##           25           26           27           28           29           30 
##  1.201058704  0.801781731  0.218836532  1.227614427 -0.006175064 -1.723354620 
##           31           32           33           34           35           36 
##  0.484547989 -2.116903550  0.077043429  1.154073108 -0.428625181  0.193065658 
##           37           38           39           40           41           42 
## -0.981882713 -0.822176717  0.161643274  0.073548207  1.007766454 -0.732122551 
##           43           44           45           46           47           48 
## -1.420867970 -0.452547992  0.747688987  0.093255685  0.550228652  2.942709942 
##           49           50           51           52           53           54 
## -0.628207821  0.405111166 -0.664989587  0.820945097  0.875790716 -0.452547992 
##           55           56           57           58           59           60 
##  0.292485584  0.686837352 -0.015080997 -1.104383954 -0.826827985 -2.056335073 
##           61           62           63           64           65           66 
##  1.372238251  0.316109727 -0.610884666 -1.372855207 -2.710481140 -0.363484260 
##           67           68           69           70           71           72 
##  1.943890751 -1.296616027  0.466088144  0.363940142 -0.449519912 -0.188674625 
##           73           74           75           76           77           78 
##  0.466088144  1.182737362 -0.115265888 -1.334821023  0.419468223  0.316109727 
##           79           80           81           82           83           84 
##  0.608055667  0.915071896  1.023263544 -1.993869890  0.217458340  1.520042040 
##           85           86           87           88           89           90 
##  0.103716640 -0.969389351 -0.202109894  0.799094049  0.217458340 -0.168494129 
##           91           92           93           94           95           96 
## -0.751448926  0.004060238  0.085068083  0.822819349  0.217458340  2.356503608 
##           97           98           99          100 
## -1.084122174  0.613970948 -1.803847041 -1.000280614
critval <- qt(0.05/(2*nobs(mod_bf_trans)), df=df.residual(mod_bf_trans)-1, lower=FALSE)
which(abs(rstudent(mod_bf_trans)) > critval)
## named integer(0)

Examine Cook’s distances to check influences

cooks.distance(mod_bf_trans)
##            1            2            3            4            5            6 
## 1.803894e-02 1.592041e-05 1.473146e-02 7.560291e-03 3.719873e-05 4.898508e-04 
##            7            8            9           10           11           12 
## 3.923613e-03 4.991108e-04 9.178939e-05 7.470866e-03 1.300130e-02 1.527984e-02 
##           13           14           15           16           17           18 
## 3.166105e-03 1.376643e-02 8.567668e-07 1.436223e-03 1.108814e-02 1.579800e-03 
##           19           20           21           22           23           24 
## 8.732089e-03 4.086233e-02 3.216917e-02 2.378585e-02 1.215099e-05 5.362386e-04 
##           25           26           27           28           29           30 
## 1.527428e-02 6.864955e-03 5.146751e-04 1.594630e-02 4.100150e-07 3.094668e-02 
##           31           32           33           34           35           36 
## 2.518231e-03 4.597068e-02 6.382055e-05 1.411914e-02 1.971584e-03 4.006389e-04 
##           37           38           39           40           41           42 
## 1.026023e-02 7.216093e-03 2.808732e-04 5.816155e-05 1.080239e-02 5.730450e-03 
##           43           44           45           46           47           48 
## 2.124700e-02 2.197308e-03 5.975252e-03 9.350333e-05 3.244830e-03 8.518179e-02 
##           49           50           51           52           53           54 
## 4.225564e-03 1.761569e-03 4.732456e-03 7.194645e-03 8.179949e-03 2.197308e-03 
##           55           56           57           58           59           60 
## 9.190236e-04 5.046932e-03 2.445548e-06 1.294490e-02 7.297373e-03 4.349047e-02 
##           61           62           63           64           65           66 
## 1.984588e-02 1.073313e-03 3.996652e-03 1.986338e-02 7.321324e-02 1.418638e-03 
##           67           68           69           70           71           72 
## 3.904486e-02 1.775657e-02 2.330450e-03 1.422194e-03 2.168065e-03 3.826290e-04 
##           73           74           75           76           77           78 
## 2.330450e-03 1.481869e-02 1.428422e-04 1.879842e-02 1.888401e-03 1.073313e-03 
##           79           80           81           82           83           84 
## 3.959867e-03 8.923489e-03 1.113345e-02 4.099506e-02 5.082161e-04 2.424210e-02 
##           85           86           87           88           89           90 
## 1.156548e-04 1.000339e-02 4.390374e-04 6.819321e-03 5.082161e-04 3.051786e-04 
##           91           92           93           94           95           96 
## 6.035135e-03 1.772638e-07 7.780662e-05 7.227296e-03 5.082161e-04 5.634636e-02 
##           97           98           99          100 
## 1.248014e-02 4.036974e-03 3.380502e-02 1.064421e-02
which(cooks.distance(mod_bf_trans) >= 1)
## named integer(0)
plot(cooks.distance(mod_bf_trans), type="h")

TukeyHSD(aov(Butterfat ~ Breed, data = butterfat))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Butterfat ~ Breed, data = butterfat)
## 
## $Breed
##                              diff         lwr         upr     p adj
## Canadian-Ayrshire          0.3785  0.01348598  0.74351402 0.0381804
## Guernsey-Ayrshire          0.8900  0.52498598  1.25501402 0.0000000
## Holstein-Fresian-Ayrshire -0.3905 -0.75551402 -0.02548598 0.0297910
## Jersey-Ayrshire            1.2325  0.86748598  1.59751402 0.0000000
## Guernsey-Canadian          0.5115  0.14648598  0.87651402 0.0016669
## Holstein-Fresian-Canadian -0.7690 -1.13401402 -0.40398598 0.0000007
## Jersey-Canadian            0.8540  0.48898598  1.21901402 0.0000000
## Holstein-Fresian-Guernsey -1.2805 -1.64551402 -0.91548598 0.0000000
## Jersey-Guernsey            0.3425 -0.02251402  0.70751402 0.0767002
## Jersey-Holstein-Fresian    1.6230  1.25798598  1.98801402 0.0000000
with(butterfat, tapply(Butterfat, Breed, mean))
##         Ayrshire         Canadian         Guernsey Holstein-Fresian 
##           4.0600           4.4385           4.9500           3.6695 
##           Jersey 
##           5.2925
TukeyHSD(aov(Butterfat ~ Age, data = butterfat))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Butterfat ~ Age, data = butterfat)
## 
## $Age
##                diff        lwr       upr     p adj
## Mature-2year 0.1046 -0.1800704 0.3892704 0.4676321
with(butterfat, tapply(Butterfat, Age, mean))
##  2year Mature 
## 4.4298 4.5344

three categorical variabls

eggs
##     Fat Lab Technician Sample
## 1  0.62   I        one      G
## 2  0.55   I        one      G
## 3  0.34   I        one      H
## 4  0.24   I        one      H
## 5  0.80   I        two      G
## 6  0.68   I        two      G
## 7  0.76   I        two      H
## 8  0.65   I        two      H
## 9  0.30  II        one      G
## 10 0.40  II        one      G
## 11 0.33  II        one      H
## 12 0.43  II        one      H
## 13 0.39  II        two      G
## 14 0.40  II        two      G
## 15 0.29  II        two      H
## 16 0.18  II        two      H
## 17 0.46 III        one      G
## 18 0.38 III        one      G
## 19 0.27 III        one      H
## 20 0.37 III        one      H
## 21 0.37 III        two      G
## 22 0.42 III        two      G
## 23 0.45 III        two      H
## 24 0.54 III        two      H
## 25 0.18  IV        one      G
## 26 0.47  IV        one      G
## 27 0.53  IV        one      H
## 28 0.32  IV        one      H
## 29 0.40  IV        two      G
## 30 0.37  IV        two      G
## 31 0.31  IV        two      H
## 32 0.43  IV        two      H
## 33 0.35   V        one      G
## 34 0.39   V        one      G
## 35 0.37   V        one      H
## 36 0.33   V        one      H
## 37 0.42   V        two      G
## 38 0.36   V        two      G
## 39 0.20   V        two      H
## 40 0.41   V        two      H
## 41 0.37  VI        one      G
## 42 0.43  VI        one      G
## 43 0.28  VI        one      H
## 44 0.36  VI        one      H
## 45 0.18  VI        two      G
## 46 0.20  VI        two      G
## 47 0.26  VI        two      H
## 48 0.06  VI        two      H
mod_eggs <- lm(Fat ~., data = eggs)
summary(mod_eggs)
## 
## Call:
## lm(formula = Fat ~ ., data = eggs)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30583 -0.04656  0.01312  0.06656  0.19500 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.59500    0.04773  12.467 2.33e-15 ***
## LabII         -0.24000    0.05845  -4.106 0.000193 ***
## LabIII        -0.17250    0.05845  -2.951 0.005273 ** 
## LabIV         -0.20375    0.05845  -3.486 0.001206 ** 
## LabV          -0.22625    0.05845  -3.871 0.000392 ***
## LabVI         -0.31250    0.05845  -5.346 3.91e-06 ***
## Techniciantwo  0.01917    0.03375   0.568 0.573244    
## SampleH       -0.04917    0.03375  -1.457 0.152947    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1169 on 40 degrees of freedom
## Multiple R-squared:  0.4657, Adjusted R-squared:  0.3722 
## F-statistic:  4.98 on 7 and 40 DF,  p-value: 0.0004051
par(mfrow = c(2, 2))
plot(mod_eggs)#plot[4] worth paying attention (seperated into groups but seems have some violations)

#test interaction 
mod_eggs1 <- lm(Fat ~ Lab * Technician, data = eggs)
mod_eggsa<- lm(Fat ~ Lab + Technician, data = eggs)
anova(mod_eggs1, mod_eggsa) #Interaction between Lab & Technician 
## Analysis of Variance Table
## 
## Model 1: Fat ~ Lab * Technician
## Model 2: Fat ~ Lab + Technician
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1     36 0.33260                                  
## 2     41 0.57567 -5  -0.24307 5.2618 0.0009971 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_eggs2 <- lm(Fat ~ Lab * Sample, data = eggs)
mod_eggsb<- lm(Fat ~ Lab + Sample, data = eggs)
anova(mod_eggs2, mod_eggsb) #No interaction
## Analysis of Variance Table
## 
## Model 1: Fat ~ Lab * Sample
## Model 2: Fat ~ Lab + Sample
##   Res.Df     RSS Df Sum of Sq      F Pr(>F)
## 1     36 0.50200                           
## 2     41 0.55107 -5 -0.049067 0.7037 0.6243
mod_eggs3 <- lm(Fat ~ Technician * Sample, data = eggs)
mod_eggsc<- lm(Fat ~ Technician + Sample, data = eggs)
anova(mod_eggs3, mod_eggsc) #No interaction
## Analysis of Variance Table
## 
## Model 1: Fat ~ Technician * Sample
## Model 2: Fat ~ Technician + Sample
##   Res.Df     RSS Df  Sum of Sq      F Pr(>F)
## 1     44 0.98805                            
## 2     45 0.98968 -1 -0.0016333 0.0727 0.7887
mod_egg_int <- lm(Fat ~ Technician + Sample + Lab + Lab * Technician, data = eggs)
par(mfrow = c(2, 2))
plot(mod_egg_int) #Many issues for this one. I can research on why the issues happane and how can we possibly solve it  

boxcox(mod_egg_int) #No transformation needed 

All the three datasets only contain categorical variabls which many have some limitations.